mozilla · akatsoulas · May 27, 2025 · May 27, 2025 · May 28, 2025
diff --git a/kitsune/llm/questions/classifiers.py b/kitsune/llm/questions/classifiers.py
@@ -3,9 +3,16 @@
 from django.db import models
 from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
 
-from kitsune.llm.questions.prompt import spam_parser, spam_prompt, topic_parser, topic_prompt
+from kitsune.llm.questions.prompt import (
+    product_parser,
+    product_prompt,
+    spam_parser,
+    spam_prompt,
+    topic_parser,
+    topic_prompt,
+)
 from kitsune.llm.utils import get_llm
-from kitsune.products.utils import get_taxonomy
+from kitsune.products.utils import get_products, get_taxonomy
 
 DEFAULT_LLM_MODEL = "gemini-2.5-flash-preview-04-17"
 HIGH_CONFIDENCE_THRESHOLD = 75
@@ -39,32 +46,60 @@ def classify_question(question: "Question") -> dict[str, Any]:
     }
 
     spam_detection_chain = spam_prompt | llm | spam_parser
+    product_classification_chain = product_prompt | llm | product_parser
     topic_classification_chain = topic_prompt | llm | topic_parser
 
+    def handle_spam(payload: dict[str, Any], spam_result: dict[str, Any]) -> dict[str, Any]:
+        """Handle spam classification with potential product reclassification."""
+        confidence = spam_result.get("confidence", 0)
+        match confidence:
+            case _ if confidence >= HIGH_CONFIDENCE_THRESHOLD:
+                action = ModerationAction.SPAM
+            case _ if confidence > LOW_CONFIDENCE_THRESHOLD:
+                action = ModerationAction.FLAG_REVIEW
+            case _:
+                action = ModerationAction.NOT_SPAM
+
+        if not ((action == ModerationAction.SPAM) and spam_result.get("maybe_misclassified")):
+            return {"action": action, "product_result": {}}
+
+        payload["products"] = get_products(output_format="JSON")
+        product_result = product_classification_chain.invoke(payload)
+        new_product = product_result.get("product")
+
+        if new_product and new_product != payload["product"]:
+            payload["product"] = new_product
+            payload["topics"] = get_taxonomy(
+                new_product, include_metadata=["description", "examples"], output_format="JSON"
+            )
+            topic_result = topic_classification_chain.invoke(payload)
+            return {
+                "action": ModerationAction.NOT_SPAM,
+                "product_result": product_result,
+                "topic_result": topic_result,
+            }
+        else:
+            return {
+                "action": ModerationAction.SPAM,
+                "product_result": product_result,
+            }
+
     def decision_lambda(payload: dict[str, Any]) -> dict[str, Any]:
         spam_result: dict[str, Any] = payload["spam_result"]
-        confidence: int = spam_result.get("confidence", 0)
         is_spam: bool = spam_result.get("is_spam", False)
-        result = {
-            "action": ModerationAction.NOT_SPAM,
+
+        base_result = {
             "spam_result": spam_result,
+            "product_result": {},
             "topic_result": {},
         }
 
         if is_spam:
-            match confidence:
-                case _ if confidence >= HIGH_CONFIDENCE_THRESHOLD:
-                    result["action"] = ModerationAction.SPAM
-                case _ if (
-                    confidence > LOW_CONFIDENCE_THRESHOLD
-                    and confidence < HIGH_CONFIDENCE_THRESHOLD
-                ):
-                    result["action"] = ModerationAction.FLAG_REVIEW
-
-        if result["action"] == ModerationAction.NOT_SPAM:
-            result["topic_result"] = topic_classification_chain.invoke(payload)
-
-        return result
+            spam_handling = handle_spam(payload, spam_result)
+            return {**base_result, **spam_handling}
+
+        topic_result = topic_classification_chain.invoke(payload)
+        return {**base_result, "topic_result": topic_result}
 
     pipeline = RunnablePassthrough.assign(spam_result=spam_detection_chain) | RunnableLambda(
         decision_lambda

diff --git a/kitsune/llm/questions/prompt.py b/kitsune/llm/questions/prompt.py
@@ -16,7 +16,7 @@
 - Encourages illegal, unethical, or dangerous behavior.
 - Promotes political views or propaganda unrelated to the product.
 - Is extremely short (e.g., less than 10 words), overly vague, or the primary purpose of the question cannot be understood from the text.
-- Intent or relevance to Mozilla's "{product}" cannot be determined.
+- Its intent cannot be determined.
 - Contains excessive random symbols, emojis, or gibberish text.
 - Contains QR codes or links/images directing users off-site.
 - Clearly unrelated to Mozilla's "{product}" product features, functionality or purpose.
@@ -29,11 +29,56 @@
    - `0` = Extremely uncertain.
    - `100` = Completely certain.
 4. Provide a concise explanation supporting your decision.
+5. **Determine if question was misclassified due to wrong product:** True only if this is a legitimate Mozilla support request that 
+doesn't relate to "{product}" but clearly relates to another Mozilla product.
 
 # Response format
 {format_instructions}
 """
 
+PRODUCT_INSTRUCTIONS = """
+# Role and Goal
+You are a specialized product reclassification agent for Mozilla's support forums.
+Your task is to evaluate user-submitted questions previously flagged as spam and determine
+if they should instead be reassigned to a specific Mozilla product category.
+
+# Available Mozilla Products
+You MUST select exactly one product from the following JSON-formatted list if reassignment is appropriate:
+- **title**: Name of the product.
+- **description**: A short description of the product.
+
+```json
+{products}
+```
+
+# When to Reassign a Question
+Reassign a question to a specific product ONLY if **all** of these criteria apply:
+- The question explicitly mentions or clearly relates to the product's distinctive features or functionalities.
+- The question includes technical terms, error messages, or workflows unique to the specific product.
+- You are highly confident the original spam classification resulted from incorrect product selection.
+- The content represents a legitimate support request, not promotional or spam content.
+
+# When NOT to Reassign
+Do NOT reassign the question if **any** of these criteria apply:
+- The content is genuinely promotional, spam, inappropriate, or clearly unrelated to Mozilla products.
+- You cannot confidently determine the relevant Mozilla product.
+- The question equally involves multiple Mozilla products with no clear primary focus.
+- The original spam classification appears correct, regardless of product selection.
+
+# Task Instructions
+Given a user-submitted question previously flagged as spam, strictly follow these steps:
+1. **Carefully Evaluate** whether the question clearly relates to a specific Mozilla product.
+2. **Spam Verification** - Confirm explicitly that the content is not promotional or actual spam.
+3. **Determine Reassignment:** If the question meets **all** reassignment criteria, explicitly select the most appropriate product. Otherwise, do not reassign.
+4. Indicate your **confidence** in your decision (0-100), with higher scores indicating stronger certainty:
+   - `0` = Extremely uncertain.
+   - `100` = Completely certain.
+5. Provide a concise explanation (1–2 sentences) clearly supporting your decision.
+
+# Response Format
+{format_instructions}
+"""
+
 TOPIC_INSTRUCTIONS = """
 # Role and goal
 You are a content classification agent specialized in Mozilla's "{product}" product support forums.
@@ -100,6 +145,14 @@
             type="str",
             description="The reason for identifying the question as spam or not spam.",
         ),
+        ResponseSchema(
+            name="maybe_misclassified",
+            type="bool",
+            description=(
+                "True if this appears to be a legitimate Mozilla support request"
+                " that was flagged as spam solely due to incorrect product categorization."
+            ),
+        ),
     )
 )
 
@@ -119,6 +172,34 @@
     )
 )
 
+product_parser = StructuredOutputParser.from_response_schemas(
+    (
+        ResponseSchema(
+            name="product",
+            type="str",
+            description=(
+                "The Mozilla product selected for reassignment or null if no reassignment"
+                " should be made."
+            ),
+        ),
+        ResponseSchema(
+            name="confidence",
+            type="int",
+            description=(
+                "An integer from 0 to 100 that indicates the level of confidence in the"
+                " product reassignment decision, with 0 representing the lowest confidence"
+                " and 100 the highest."
+            ),
+        ),
+        ResponseSchema(
+            name="reason",
+            type="str",
+            description="The reason for reassigning to the selected product "
+            " or for not reassigning.",
+        ),
+    )
+)
+
 
 spam_prompt = ChatPromptTemplate(
     (
@@ -134,3 +215,11 @@
         ("human", USER_QUESTION),
     )
 ).partial(format_instructions=topic_parser.get_format_instructions())
+
+
+product_prompt = ChatPromptTemplate(
+    (
+        ("system", PRODUCT_INSTRUCTIONS),
+        ("human", USER_QUESTION),
+    )
+).partial(format_instructions=product_parser.get_format_instructions())
diff --git a/kitsune/products/utils.py b/kitsune/products/utils.py
@@ -1,7 +1,7 @@
 import json
 
-from django.db.models import Prefetch, Q
 import yaml
+from django.db.models import Prefetch, Q
 
 from kitsune.products.models import Product, Topic
 

diff --git a/kitsune/questions/utils.py b/kitsune/questions/utils.py
@@ -176,41 +176,71 @@ def process_classification_result(
 ) -> None:
     """
     Process the classification result from the LLM and take moderation action.
+    Handles spam, flag review, and updates to product and topic if suggested by the classifier.
     """
     sumo_bot = Profile.get_sumo_bot()
     action = result.get("action")
-    match action:
-        case ModerationAction.SPAM:
-            question.mark_as_spam(sumo_bot)
-        case ModerationAction.FLAG_REVIEW:
+
+    if action == ModerationAction.SPAM:
+        question.mark_as_spam(sumo_bot)
+        return
+    elif action == ModerationAction.FLAG_REVIEW:
+        flag_question(
+            question,
+            by_user=sumo_bot,
+            notes=(
+                "LLM flagged for manual review, for the following reason:\n"
+                f"{result.get('spam_result', {}).get('reason', '')}"
+            ),
+            reason=FlaggedObject.REASON_SPAM,
+        )
+        return
+
+    product_result = result.get("product_result", {})
+    topic_result = result.get("topic_result", {})
+    new_product_title = product_result.get("product")
+    new_topic_title = topic_result.get("topic")
+
+    update_kwargs = {}
+
+    if (
+        new_product_title
+        and hasattr(question, "product")
+        and getattr(question.product, "title", None) != new_product_title
+    ):
+        from kitsune.products.models import Product
+
+        try:
+            new_product = Product.objects.get(title=new_product_title)
+        except Product.DoesNotExist:
+            log.warning(
+                f"LLM suggested product '{new_product_title}' does not exist. Skipping product update."
+            )
+        else:
+            update_kwargs["product"] = new_product
+
+    if new_topic_title:
+        try:
+            topic = Topic.active.get(title=new_topic_title, visible=True)
+        except (Topic.DoesNotExist, Topic.MultipleObjectsReturned):
+            log.warning(
+                f"LLM suggested topic '{new_topic_title}' is invalid. Skipping topic update."
+            )
+        else:
+            update_kwargs["topic"] = topic
+
+    if update_kwargs:
+        question.save(**update_kwargs)
+        question.clear_cached_tags()
+        question.auto_tag()
+
+        if update_kwargs.get("topic"):
             flag_question(
                 question,
                 by_user=sumo_bot,
                 notes=(
-                    "LLM flagged for manual review, for the following reason:\n"
-                    f"{result['spam_result']['reason']}"
+                    f"LLM classified as {topic.title}, for the following reason:\n"
+                    f"{topic_result.get('reason', '')}"
                 ),
-                reason=FlaggedObject.REASON_SPAM,
+                status=FlaggedObject.FLAG_ACCEPTED,
             )
-        case _:
-            if topic_title := result["topic_result"].get("topic"):
-                try:
-                    topic = Topic.active.get(title=topic_title, visible=True)
-                except (Topic.DoesNotExist, Topic.MultipleObjectsReturned):
-                    return
-                else:
-                    flag_question(
-                        question,
-                        by_user=sumo_bot,
-                        notes=(
-                            "LLM classified as {topic.title}, for the following reason:\n"
-                            f"{result['topic_result']['reason']}"
-                        ),
-                        status=FlaggedObject.FLAG_ACCEPTED,
-                    )
-                    if question.topic:
-                        question.tags.remove(question.topic.slug)
-                    question.topic = topic
-                    question.save()
-                    question.tags.add(topic.slug)
-                    question.clear_cached_tags()